
/* READ ME:
This program reads in IV, OLS, DID results and saves them in an intermediary file that will be used to create tables in the paper and appendices.

There are 8 "steps" in this program.
1. Define programs to clean/ standardize everything
2. Import Cook County IV OLS Data
3. Import NY IV OLS Data
4. Combine Cook and NY IV OLS Data
5. Import Cook Count DID Data 
6. Import NY DID Data
7. Combine Cook and NY DID Data
8. Combine Cook and NY IV OLS AND DID Data and final cleaning

*/

clear all 

//define folder global here

gl Inputs "$folder\1_Inputs"
gl Intermediate "$folder\2_Intermediate"
gl Outputs "$folder\3_Outputs"

********************************************************************************
**# STEP 1: DEFINE PROGRAMS TO CLEAN/ STANDARDIZE EVERYTHING
********************************************************************************

cap program drop std_subgroups
program define std_subgroups
{	
replace group = subinstr(group, "Pop: ", "", .)	

replace group = "all" 		if inlist(group, "all=1", "ALL", "all", "All")
replace group = "nonblack" 	if inlist(group, "black=0", "NOTBLACK", "NONBLACK", "notblack")
replace group = "black" 	if inlist(group, "black=1", "BLACK")
replace group = "male" 		if inlist(group, "female=0", "fem=0", "MALE")
replace group = "female" 	if inlist(group, "female=1", "fem=1", "FEMALE")
replace group = "gr0" if group=="great_recession=0"
replace group = "gr1" if group=="great_recession=1"
}
end

cap program drop std_time_periods
program define std_time_periods 
{
replace time_period = subinstr(time_period, "Outcome window: ", "", .)	
replace time = "Q" + time_period if !regexm(time_period, "Y|Q")
replace time = "Q01_04" if inlist(time_period, "Q1_4", "Y1", "Y1_", "Y0_1", "Q01Q04")
replace time = "Q05_08" if inlist(time_period, "Q5_8", "Y2", "Y2_", "Q05Q08")
replace time = "Q09_16" if inlist(time_period, "Q9_16", "Q09Q16", "Y3_4")
replace time = "Q17_24" if inlist(time_period, "Q17Q24", "Y5_6")
replace time = "Q01_08" if inlist(time_period, "Y1_2")
replace time = "Qn8_n1" if inlist(time_period, "Qb1_8", "Qn8_01", "Qn8_n1", "Yn1_2", "Yn2_n1", "YL1_2_", "YL1_2")
replace time = "Qn4_n1" if inlist(time_period, "Yn1", "YL1_", "YL1")
replace time = "Qn8_n4" if inlist(time_period, "Yn2", "YL2_")

}
end 

cap program drop std_timeperiods_did
program define std_timeperiods_did 
{
replace time = "Qn8_n1" if time=="Qn8_01" | time=="Qb1_8"
replace time = "Q01_04"	if time=="Y1" | time=="Q1_4"
replace time = "Q05_08"	if time=="Y2" | time=="Q5_8"
replace time = "Q09_16"	if time=="Y3_4" | time=="Q9_16"	
}
end

cap program drop std_specs
program define std_specs 
{
gen 	new_spec = spec
replace new_spec = "ols" 	if inlist(spec, "Spec: OLS baseline", "NOLS")
replace new_spec = "ldv" if inlist(spec, "Spec: OLS baseline w/ additional controls", "OLS more controls", "ols", "OLS", "OLS_AC")
replace new_spec = "rf" if inlist(spec, "Spec: RF: stringency w/ additional controls", "RF", "rf", "RF_AC") | regexm(spec, "RF: stringency more controls")
replace new_spec = "ivac" if inlist(spec, "Spec: IV baseline w/ additional controls", "IV more controls", "iv more controls", "IV", "iv", "IV_AC")
replace new_spec = "iv11" if spec=="Spec: IV baseline w/ 1 instrument and control for amount stringency" | spec=="IV_AC11"
drop spec 
rename new_spec specification 
}
end


cap program drop std_outcomes
program define std_outcomes
{
replace outcome = "fute" if inlist(outcome, "Future eviction at same addr (1 year)", "Future eviction at same addr (2 year)", "Future eviction at same addr (3 year)", "Future eviction at same addr (4 year)")
replace outcome = "lock" if inlist(outcome, "Sheriff completed eviction order (1 yr)", "Sheriff completed eviction order (2 yr)")
replace outcome = "judg" if outcome=="Judgment amount"
replace outcome = "futc" if inlist(outcome, "Future case at same addr (1 year)", "Future case at same addr (2 year)", "Future case at same addr (3 year)", "Future case at same addr (4 year)")
replace outcome = subinstr(outcome, "Outcome: ", "", .)
replace outcome = subinstr(outcome, "_", " ", .)
replace outcome = lower(outcome)
replace outcome = "an13" if outcome=="anyout13states"
replace outcome = "auto" if outcome=="any auto loan or lease"
replace outcome = "auto" if outcome=="auto loan"
replace outcome = "deli" if outcome=="total balance: collections and delinquencies"	| outcome=="total bal. delinquencies or collections" | outcome=="delin acc bal" | outcome=="deliquent accts bal"
replace outcome = "cred" if outcome=="credit score"| outcome=="credit"
replace outcome = "earn" if outcome=="earnings" 
replace outcome = "empl" if outcome=="employed" | outcome=="anyearn allst" | outcome=="emp"
replace outcome = "eshu" if outcome=="em shelter"
replace outcome = "inde" if outcome=="financial health index" | outcome=="index financial health"
replace outcome = "home" if outcome=="shelter"
replace outcome = "move" if outcome=="not at eviction address" | outcome=="not at evic" | outcome=="move"
replace outcome = "movo" if outcome=="not at evic o unobs"
replace outcome = "numa" if outcome=="numloans" | outcome=="number of payday accounts"
replace outcome = "numi" if outcome=="numinq" | outcome=="number of payday inquiries"
replace outcome = "paya" if outcome=="anypaydayloan" | outcome=="any payday account (*100)"
replace outcome = "payi" if outcome=="any payday inquiry (*100)" | outcome=="anyinquiry"
replace outcome = "povr" if outcome=="avg marf all hh pov rate" | outcome=="avg pov rate" | outcome=="nhoodpov"
replace outcome = "stat" if outcome=="state"
replace outcome = "zero" if outcome=="no open revolving account" | outcome=="zero revolving" | outcome=="zero rev"
replace outcome = "ment" if outcome=="num mental health"
replace outcome = "hosp" if outcome=="num hospital visits"
replace outcome = "emer" if outcome=="num emergency visits"

}
end

********************************************************************************
********************************************************************************
**# STEP 2: IMPORT COOK COUNTY IV OLS DATA

import delimited "${Inputs}\Cook_experian_ivols.csv", clear
tempfile cook_file_credit
save `cook_file_credit'

import delimited "${Inputs}\Cook_experian_ivols_gr.csv", clear
append using `cook_file_credit'

	split label, parse(", ")
	rename (label1 label2 label3 label4) (group outcome time_period spec)
	drop label*
	
	std_subgroups 
	std_outcomes
	std_time_periods
	std_specs
	
	preserve 
		keep if mi(spec)
		keep y_base* sd_* group outcome time
		drop if mi(y_base_mean) & mi(y_base_sd) & mi(y_base_n) & mi(sd_y_all) & mi(sd_evict_all)
		rename (y_base_mean y_base_sd y_base_n sd_y_all sd_evict_all) ///
		(mean_non_evic_chi se_non_evic_chi non_evic_n_chi sd_outcome_full_chi sd_frac_evic_chi)
		
		tempfile stats
		save `stats'
	restore 
	
	keep if !mi(spec)
	keep *_evict *amount n_obs group outcome time spec
	rename (b_evict se_evict b_judgmentamount se_judgmentamount n_obs) ///
	(evic_coeff_chi evic_se_chi judge_amt_coeff_chi judge_amt_se_chi obs_chi)
	
	merge m:1 group outcome time_period using `stats'
	drop _m 
	
	tempfile cook_file_credit 
	save `cook_file_credit'
 
	

import delimited "${Inputs}\Cook_lehd_ivols_gr.csv", clear	
	
	// groups are great recession or not great recession
	replace group = "gr1" if group=="gr=1"
	replace group = "gr0" if group=="gr=0"
	
	rename evict* evic*
	rename *_non_evict *_outcome_non_evicted
	
	tempfile cook_file_main
	save `cook_file_main'
	
foreach jj in lehd marf hmis {
	import delimited "${Inputs}\Cook_`jj'_ivols.csv", clear 	
	rename population group
	append using `cook_file_main'
	tempfile cook_file_main
	save `cook_file_main'
}	

	std_specs
	std_outcomes
	std_subgroups
	std_time_periods

	rename *non_evicted* *non_evic*
	rename *full_sample *full
	rename * *_chi 
	rename (outcome_chi group_chi specification_chi time_period_chi) ///
	(outcome group specification time_period)
	rename (mean_outcome_non_evic_chi sd_outcome_non_evic_chi sd_evic_full_chi) ///
	(mean_non_evic_chi se_non_evic_chi sd_frac_evic_chi)
	
	drop data
	
	tempfile cook_file_main
	save `cook_file_main'
		

use "${Inputs}\Cook_intermediate_outcomes_ivols.dta", clear 

	gen 	time_period = "Q01_04" if regexm(outcome, "1 year|1 yr")
	replace time = "Q05_08" if regexm(outcome, "2 year|2 yr")

	std_outcomes 
	std_specs
	
	rename (beta_est beta_se baseline_mean sd_of_mean n_obs) ///
	(evic_coeff_chi evic_se_chi mean_non_evic_chi se_non_evic_chi obs_chi)
	
append using `cook_file_credit'
append using `cook_file_main'

compress 

tempfile cook_files
save `cook_files'

********************************************************************************
********************************************************************************
**# STEP 3: IMPORT NY IV OLS DATA

use "${Inputs}\NY_labor_ivols.dta", clear
append using "${Inputs}\NY_labor_ivols_subgroups.dta"
append using "${Inputs}\NY_homelessness_ivols.dta"
append using "${Inputs}\NY_homelessness_ivols_subgroups.dta"
append using "${Inputs}\NY_moves_ivols.dta"
append using "${Inputs}\NY_moves_ivols_subgroups.dta"
append using "${Inputs}\NY_nbhood_ivols.dta"
append using "${Inputs}\NY_nbhood_ivols_subgroups.dta"

	replace outcome = upper(outcome)
	
	split outcome, parse("Q")
	gen time = "Q" + outcome2 + "_" + outcome3 if !mi(outcome2) & !mi(outcome3)
	drop outcome outcome2 outcome3 
	rename outcome1 outcome
	
	split outcome, parse("Y")
	replace time = "Y" + subinstr(outcome2, "EAR", "", .) if !mi(outcome2) & mi(outcome3) & mi(time)
	replace time = "Y" + subinstr(outcome2, "EAR", "", .) + "_" + outcome3 if !mi(outcome3) & mi(time)
	drop outcome outcome2 outcome3 
	rename outcome1 outcome
	
append using "${Inputs}\NY_credit_ivols.dta"	
append using "${Inputs}\NY_hospital_ivols.dta"	

	drop out 
	replace group = "all" if mi(group)
	 
	replace time = "Qn8_n1" if time=="Y1_2" & outcome=="LMOVE"
	replace outcome = "move" if outcome=="LMOVE"
	 
	rename time time_period
	std_specs
	std_outcomes 
	std_time_periods

append using "${Inputs}\NY_lockout_ivols.dta"
append using "${Inputs}\NY_futurecase_ivols.dta"

	replace group = estsamp if mi(group) & !mi(estsamp)
	replace group = "gr0" if group=="NOTRECESS"
	replace group = "gr1" if group=="RECESSION"
	drop estsamp
	
	replace time_period = "Q01_04" if inlist(outcome, "futureviction1yr", "Y1LOCKOUT")
	replace time_period = "Q05_08" if inlist(outcome, "futureviction2yr", "Y2LOCKOUT")
	
	replace outcome = "lock" if inlist(outcome, "Y1LOCKOUT", "Y2LOCKOUT")
	replace outcome = "fute" if inlist(outcome, "futureviction1yr", "futureviction2yr")
	
	replace spec = "rf" if spec=="RF"
	replace spec = "ivac" if spec=="IV" & inlist(outcome, "lock", "fute")
	replace spec = "ldv" if spec=="OLS" & inlist(outcome, "lock", "fute")
	std_subgroups
 
	// separate out means and sds, make them their own columns
	preserve 
		keep if inlist(specification, "EMEAN", "EMEANSD", "GMEAN", "GMEANSD", "MEANSD", "meansd")
		replace specification = "MEANSD" if specification=="meansd"
		drop N
		reshape wide beta se, i(group outcome time) j(specification) string 
		rename (betaEMEAN seEMEAN betaGMEAN seGMEAN betaMEANSD seMEANSD) (frac_evic_ny sd_frac_evic_ny mean_outcome_full_ny sd_outcome_full_ny mean_non_evic_ny se_non_evic_ny)
		
		tempfile nyc_summary_stats
		save `nyc_summary_stats'
	restore 
		
	drop if inlist(specification, "EMEAN", "EMEANSD", "GMEAN", "GMEANSD", "MEANSD", "meansd")

	rename (beta se N) (evic_coeff_ny evic_se_ny obs_ny)
	merge m:1 group outcome time_period using `nyc_summary_stats'
	drop _m 

	compress
 
********************************************************************************	
********************************************************************************
**# STEP 4: COMBINE COOK COUNTY AND NY IV OLS DATA	

merge 1:1 outcome group time_period specification using `cook_files'

sort outcome group specification time_period
order outcome group specification time_period *chi *ny
drop _m
 
drop if outcome=="future eviction case at same address"  
 
gen obs_both = obs_chi + obs_ny
 
foreach i in evic_coeff mean_non_evic {
	gen `i'_both = (`i'_ny + `i'_chi) / 2
}

foreach i in evic_se sd_frac_evic sd_outcome_full se_non_evic {
	gen `i'_both = sqrt(((0.25) * (`i'_chi^2)) + ((0.25) * (`i'_ny^2)))
}

//reshape and add geography variable for ny, chi or both	
reshape long evic_coeff evic_se judge_amt_coeff judge_amt_se obs frac_evic sd_frac_evic mean_outcome_full  sd_outcome_full mean_non_evic se_non_evic non_evic_n, i(outcome group specification time_period) j(geography) string

replace geography = subinstr(geography, "_", "", .)

//delete unnecessary rows
drop if mi(evic_coeff) & mi(evic_se) & mi(obs) & mi(frac_evic)

// add pvalue + stars for evic coefficient 
gen zscore = evic_coeff / evic_se 
gen pvalue=2*(1-normal(abs(zscore))) 

gen 	stars = "***" if pvalue < 0.01
replace stars = "**" if pvalue < 0.05 & mi(stars)
replace stars = "*" if pvalue < 0.1 & mi(stars)

compress
save "${Intermediate}\Combined IV OLS.dta", replace

********************************************************************************
********************************************************************************
**# STEP 5: IMPORT COOK COUNTY DID DATA

import delimited "${Inputs}\Cook_experian_did.csv", clear
	
	split label, parse(", ")
	rename (label1 label2 label3) (group outcome time_period)
	drop label*
	
	std_outcomes
	std_subgroups 

	replace time_period = subinstr(time_period, "Outcome period: ", "", .)
	replace time = "Q" + time_period
	std_timeperiods_did
	
	gen specification = "DiD"
	
	assert mi(n_persons)
	gen 	obs = p_person_quarters
	drop p_person_quarters n_persons

	drop b_evict se_evict
	rename (b_evictxout se_evictxout non_evict_base) ///
	(evicxout_coeff evicxout_se mean_non_evic)
	rename * *_chi 
	rename (outcome_chi group_chi specification_chi time_period_chi) (outcome group specification time_period)
	
	tempfile did3
	save `did3'
	
foreach jj in lehd marf hmis {
	import delimited "${Inputs}\Cook_`jj'_did.csv", clear 	
	
	cap gen n_person_years = .
	cap gen n_person_quarters = .
	
	rename population group
	
	std_subgroups
	std_outcomes	
	std_timeperiods_did
		
	gen 	obs = n_person_years
	replace obs = n_person_quarters if !mi(n_person_quarters)

	drop coeff_evict se_evict
	rename (coeff_evictxout se_evictxout mean_non_evict_base) ///
	(evicxout_coeff evicxout_se mean_non_evic)
	rename * *_chi 
	rename (outcome_chi group_chi specification_chi time_period_chi) (outcome group specification time_period)
	
	drop n_person*
	
	append using `did3'
	tempfile did3
	save `did3'
}		
	
rename evicxout_* evic_*		
compress		
	
tempfile chicago
save `chicago'
 

********************************************************************************
********************************************************************************
**# STEP 6: IMPORT NY DID DATA

use "${Inputs}\NY_homelessness_did.dta", clear
append using "${Inputs}\NY_hospital_did.dta"
append using "${Inputs}\NY_labor_did.dta"
append using "${Inputs}\NY_moves_did.dta"
append using "${Inputs}\NY_credit_did.dta"

	replace outcome = "inde" if outcome=="finindex"
	replace outcome = label if mi(outcome)
	drop if regexm(time, "-")
	drop label spec person persons
	
	std_subgroups
	std_outcomes
	  
	replace time = subinstr(time, "Q", "_", .)
	replace time = subinstr(time, "_", "Q", 1)
	std_timeperiods_did
	
	gen specification = "DiD"
	rename time time_period
	rename (beta se N) (evic_coeff_ny evic_se_ny obs_ny)
	compress

merge 1:1 outcome time_period group specification using `chicago'
 
sort outcome group specification time_period
order outcome group specification time_period *chi *ny
drop _m


********************************************************************************
********************************************************************************
**# STEP 7: COMBINE COOK COUNTY AND NY DID DATA

gen obs_both = obs_chi + obs_ny
 
foreach i in evic_coeff {
	gen `i'_both = (`i'_ny + `i'_chi) / 2
}

foreach i in evic_se {
	gen `i'_both = sqrt(((0.25) * (`i'_chi^2)) + ((0.25) * (`i'_ny^2)))
}

//reshape and add geography variable for ny, chi or both	
reshape long evic_coeff evic_se evicxout_coeff evicxout_se mean_non_evic obs se_non_evic, i(outcome group specification time_period) j(geography) string

replace geography = subinstr(geography, "_", "", .)

//delete unnecessary rows
drop if mi(evic_coeff) & mi(evic_se) & mi(obs)

// add pvalue + stars for evic coefficient 
gen zscore = evic_coeff / evic_se 
gen pvalue=2*(1-normal(abs(zscore))) 

gen 	stars = "***" if pvalue < 0.01
replace stars = "**" if pvalue < 0.05 & mi(stars)
replace stars = "*" if pvalue < 0.1 & mi(stars)

order spec outcome group time_period geography evic_coeff evic_se zscore pvalue stars obs mean_non_evic se_non_evic evicxout_coeff evicxout_se

sort specification outcome group time_period geography 

compress
save "${Intermediate}\Combined DiD.dta", replace


********************************************************************************
**# STEP 8: COMBINE IV OLS WITH DID, AND FINAL CLEANING - SHORTEN STRINGS

use "${Intermediate}\Combined IV OLS.dta", clear

append using "${Intermediate}\Combined DiD.dta"

	assert strlen(outcome)==4
	
	replace group = "a" if group=="all"
	replace group = "b" if group=="black"
	replace group = "f" if group=="female"
	replace group = "m" if group=="male"		
	replace group = "n" if group=="nonblack"
	drop if group=="nocase" | group=="prevcase"
	assert strlen(group)==1 if group!="gr1" & group!="gr0" // great recession years
	
	rename specification spec 
	replace spec = lower(spec)
	replace spec = "iv11" if spec=="iv_ac11"
	replace spec = "iv22" if spec=="iv_ac22"
	replace spec = subinstr(spec, "_", "", .)
	replace spec = "ldv" if spec=="olsac"
	replace spec = "ols" if spec=="ldv" & inlist(outcome, "ment", "emer", "hosp")
	drop if spec=="meansd" | spec=="emean" | spec=="gmean" | spec=="cw"
	replace spec = "ivac" if spec=="iv" & inlist(outcome, "mort", "hosp", "ment", "emer")
	assert strlen(spec) < 6
	
	rename time_period time 
	replace time = subinstr(time, "Q", "q", .)
	replace time = subinstr(time, "_", "", .)
	assert strlen(time) < 6
	
	rename geography geo
	replace geo = "b" if geo=="both"
	replace geo = "c" if geo=="chi"
	replace geo = "n" if geo=="ny"
	
	** KEEP ONLY THE FOLLOWING VARIABLES FOR THE TABLES: 
	** Evic coeff, Evic SE, Obs, Mean Non-evic and SE Non-evic
	keep outcome group spec time geo evic_coeff evic_se obs mean_non_evic ///
	se_non_evic stars
	
	** round to either 4 decimal points or 2 decimal points
	foreach i in evic_coeff evic_se mean_non_evic se_non_evic {
		gen `i'2 = string(`i', "%10.3fc")
		gen `i'3 = string(`i', "%12.0fc")
		gen `i'4 = string(`i', "%12.2fc")
	}

	* round eviction coeff
	gen 	value_co = evic_coeff2 + stars if strpos(evic_coeff3, ".") < 4
	replace value_co = evic_coeff2 + stars if strpos(evic_coeff3, ".") < 5 & strpos(evic_coeff3, "-") == 1
	replace value_co = evic_coeff3 + stars if mi(value_co)
	replace value_co = evic_coeff3 + stars if inlist(outcome, "earn", "totb", "deli")
	replace value_co = evic_coeff4 + stars if inlist(outcome, "cred", "payi")
	
	* round eviction se
	gen 	value_se = "(" + evic_se2 + ")" if value_co == (evic_coeff2 + stars)
	replace value_se = "(" + evic_se3 + ")" if value_co == (evic_coeff3 + stars)
	replace value_se = "(" + evic_se4 + ")" if value_co == (evic_coeff4 + stars)
	
	* round means
	gen 	value_m = mean_non_evic3 if regexm(mean_non_evic3, ",") 
	replace value_m = mean_non_evic2 if mi(value_m) & mean_non_evic2 != "."
	replace value_m = mean_non_evic4 if inlist(outcome, "cred", "payi")
	
	* round mean se
	gen 	value_mse = "(" + se_non_evic2 + ")" if value_m == mean_non_evic2
	replace value_mse = "(" + se_non_evic3 + ")" if value_m == mean_non_evic3
	replace value_mse = "(" + se_non_evic4 + ")" if value_m == mean_non_evic4

	* round obs 
	gen 	value_obs = string(obs, "%12.0fc")
	
	drop evic_coeff* evic_se* obs stars mean_non_evic* se_non_evic*
	reshape long value, i(outcome group spec time geo) j(item) string
	replace item = subinstr(item, "_", "", .)
	
	replace value = "" if value=="."
	replace value = "" if value=="(.)"
	drop if mi(value)
	
	compress 
save "${Intermediate}\Combined for forloops.dta", replace 

